Removing countries
covid_data <- read_csv("./owid-covid-data.csv")
## Rows: 309688 Columns: 67
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): iso_code, continent, location, tests_units
## dbl (62): total_cases, new_cases, new_cases_smoothed, total_deaths, new_dea...
## date (1): date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
covid_data <- covid_data %>% filter(continent == 'Europe')
locs <- covid_data %>% group_by(location) %>% summarize(n_na = sum(is.na(total_cases_per_million))/length(total_cases_per_million)) %>% filter(n_na < 0.2) %>% select(location) # removing countries with more than 20% NA values
remove_countries <- c("Vatican","Faeroe Islands","Guernsey","Isle of Man","Kosovo","San Marino")
countries <- locs$location
countries <- countries[!(countries %in% remove_countries)]
n_countries <- length(countries)
data <- covid_data %>% select(c('date','location','new_cases_per_million','total_cases_per_million','new_cases_per_million')) %>% filter(location %in% countries)
for (country in countries) {
plot(data[data$location == country,]$date,data[data$location == country,]$new_cases_per_million,type="l")
}













































Taking the log and plotting
data$total_cases_per_million <- log(data$total_cases_per_million)
data %>% group_by(location) %>% ggplot(aes(x = date, y = total_cases_per_million,color = location)) + geom_line() + theme(legend.text = element_text(size=5), legend.key.height= unit(0.2, 'cm'),
legend.key.width= unit(0.2, 'cm'), legend.title = element_text(size=7))
## Warning: Removed 2298 row(s) containing missing values (geom_path).
